/* Copyright 2012 Tobias Marschall
 *
 * This file is part of CLEVER.
 *
 * CLEVER is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CLEVER is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CLEVER.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <sstream>

#include <boost/unordered_set.hpp>

#include "../NfaMatcher.h"
#include "SplitMapping.h"
#include "WholeMapping.h"
#include "Read.h"
#include "InterSplitMapping.h"

using namespace std;

Read::Read() : is_sorted(false) {
}

Read::Read(const Read&) {
	// forbid copy construction
	assert(false);
}


Read::~Read() {
	for (size_t i=0; i<mappings.size(); ++i) {
		if (mappings[i] != 0) delete mappings[i];
	}
	for (size_t i=0; i<left_anchors.size(); ++i) {
		if (left_anchors[i].alignment != 0) delete left_anchors[i].alignment;
	}
	for (size_t i=0; i<right_anchors.size(); ++i) {
		if (right_anchors[i].alignment != 0) delete right_anchors[i].alignment;
	}
}

auto_ptr<Read::anchor_search_regions_t> Read::determineAnchorSearchRegions(int search_distance, int min_anchor_length, const Read* mate) {
	assert(search_distance > 0);
	auto_ptr<anchor_search_regions_t> result(new anchor_search_regions_t());
	// iterate over all right anchors, to determine where to search for new left anchors
	for (size_t i=0; i<right_anchors.size(); ++i) {
		const anchor_t& a = right_anchors[i];
		if (a.whole_read) continue;
		int end = a.position - a.length - min_anchor_length + 1;
		int start = end - search_distance + 1;
		if (a.reverse) {
			result->left_reverse.add(a.ref_id, start, end);
		} else {
			result->left.add(a.ref_id, start, end);
		}
	}
	// iterate over all left anchors, to determine where to search for new right anchors
	for (size_t i=0; i<left_anchors.size(); ++i) {
		const anchor_t& a = left_anchors[i];
		if (a.whole_read) continue;
		int start = a.position + a.length + min_anchor_length - 1;
		int end = start + search_distance - 1;
		if (a.reverse) {
			result->right_reverse.add(a.ref_id, start, end);
		} else {
			result->right.add(a.ref_id, start, end);
		}
	}
	// add positions based on known anchors of mate
	if (mate != 0) {
		// iterate over mate's right anchors
		for (size_t i=0; i<mate->right_anchors.size(); ++i) {
			const anchor_t& a = mate->right_anchors[i];
			if (a.reverse) continue;
			int start = a.position + 1;
			int end = start + search_distance - 1;
			result->left_reverse.add(a.ref_id, start, end);
		}
		// iterate over mate's left anchors
		for (size_t i=0; i<mate->left_anchors.size(); ++i) {
			const anchor_t& a = mate->left_anchors[i];
			if (!a.reverse) continue;
			int start = a.position - search_distance;
			int end = a.position - 1;
			result->right.add(a.ref_id, start, end);
		}
	}
	return result;
}

void Read::findNewAnchors(Read::anchor_type_t anchor_type, Read::strandedness_t strand, const OverlappingRegions& regions, const std::vector<NamedDnaSequence*>& references, int anchor_length, int allowed_errors) {
	NfaMatcher* matcher = 0;
	if (anchor_type == LEFT_ANCHOR) {
		if (strand == FORWARD) matcher = new NfaMatcher(seq, anchor_length, anchor_length - 1, true);
		else matcher = new NfaMatcher(seq.reverseComplement(), anchor_length, anchor_length - 1, true);
	} else {
		if (strand == FORWARD) matcher = new NfaMatcher(seq, anchor_length, seq.size()-anchor_length, false);
		else matcher = new NfaMatcher(seq.reverseComplement(), anchor_length, seq.size()-anchor_length, false);
	}
	const vector<OverlappingRegions::interval_t>& intervals = regions.list();
	vector<OverlappingRegions::interval_t>::const_iterator it = intervals.begin();
	for (; it != intervals.end(); ++it) {
		const NamedDnaSequence& ref = *references[it->chromosome_id];
		// start search earlier by offset such that all matches that end in given
		// region can be found
		int offset = anchor_length + allowed_errors - 1;
		int pos = anchor_type==LEFT_ANCHOR ? min(it->end+offset,(int)ref.size()-1) : max(it->start-offset,0);
		int max_length = it->end - it->start + 1 + offset;
//		if (anchor_type==LEFT_ANCHOR) {
//			cerr << "  Searching " << it->chromosome_id << "," << pos-max_length+1 << "-" << pos << " from right to left for a LEFT ANCHOR (" << (strand==FORWARD?"FORWARD":"REVERSE") << ')' << endl;
//		} else {
//			cerr << "  Searching " << it->chromosome_id << "," << pos << "-" << (pos+max_length-1) << " from left to right for a RIGHT ANCHOR (" << (strand==FORWARD?"FORWARD":"REVERSE") << ')' << endl;
//		}
		auto_ptr<vector<pair<size_t,size_t> > > matches = matcher->findMatches(ref, allowed_errors, pos, max_length);
		for (size_t i = 0; i<matches->size(); ++i) {
//			cerr << "   found match at " << matches->at(i).first << ", distance " << matches->at(i).second << endl;
			// for a set of neighbouring matches, pick leftmost, lowest-distance
			if ((i>0) && (matches->at(i-1).first == matches->at(i).first + (anchor_type==LEFT_ANCHOR?+1:-1))) {
				if (matches->at(i-1).second <= matches->at(i).second) continue;
			}
			if ((i+1<matches->size()) && (matches->at(i+1).first == matches->at(i).first + (anchor_type==LEFT_ANCHOR?-1:+1))) {
				if (matches->at(i).second > matches->at(i+1).second) continue;
			}
			if (anchor_type == LEFT_ANCHOR) {
//				cerr << "   found left anchor at " << matches->at(i).first << endl;
				left_anchors.push_back(anchor_t(it->chromosome_id, matches->at(i).first, strand==REVCOMP, anchor_length, false, false));
			} else {
//				cerr << "   found right anchor at " << matches->at(i).first << endl;
				right_anchors.push_back(anchor_t(it->chromosome_id, matches->at(i).first, strand==REVCOMP, anchor_length, false, false));
			}
		}
	}
	delete matcher;
}


void Read::findNewAnchors(const Read::anchor_search_regions_t& regions, const std::vector<NamedDnaSequence*>& references, int anchor_length, int allowed_errors) {
	findNewAnchors(LEFT_ANCHOR, FORWARD, regions.left, references, anchor_length, allowed_errors);
	findNewAnchors(LEFT_ANCHOR, REVCOMP, regions.left_reverse, references, anchor_length, allowed_errors);
	findNewAnchors(RIGHT_ANCHOR, FORWARD, regions.right, references, anchor_length, allowed_errors);
	findNewAnchors(RIGHT_ANCHOR, REVCOMP, regions.right_reverse, references, anchor_length, allowed_errors);
}

void Read::removeDuplicateAnchors(vector<anchor_t>& anchor_list) {
	if (anchor_list.size() <= 1) return;
	vector<anchor_t> copy(anchor_list.begin(), anchor_list.end());
	sort(copy.begin(), copy.end(), anchor_comparator_t());
	anchor_list.clear();
	for (size_t i = 0; i < copy.size(); ++i) {
		if ((i>0) && (copy[i-1].position == copy[i].position)) continue;
		anchor_list.push_back(copy[i]);
	}
}

void Read::removeDuplicateMappings() {
	if (mappings.size() <= 1) return;
	vector<Mapping*> copy(mappings.begin(), mappings.end());
	sort(copy.begin(), copy.end(), mapping_comparator_t());
	mappings.clear();
	for (size_t i = 0; i < copy.size(); ++i) {
		if ((i+1<copy.size()) && (copy[i+1]->getRefId() == copy[i]->getRefId()) && (copy[i+1]->getStartPosition() == copy[i]->getStartPosition()) && (copy[i+1]->getEndPosition() == copy[i]->getEndPosition())) {
			delete copy[i];
		} else {
			mappings.push_back(copy[i]);
		}
	}
	is_sorted = false;
}

void Read::resetAnchorList(std::vector<anchor_t>& anchor_list) {
	vector<anchor_t> copy(anchor_list.begin(), anchor_list.end());
	anchor_list.clear();
	for (size_t i = 0; i < copy.size(); ++i) {
		if (copy[i].input_anchor) anchor_list.push_back(copy[i]);
	}
}

void Read::resetAnchorList() {
	resetAnchorList(left_anchors);
	resetAnchorList(right_anchors);
}

void Read::clearAnchorList() {
	left_anchors.clear();
	right_anchors.clear();
}

void Read::computeAlignments(const vector<NamedDnaSequence*>& references, const SplitAligner& aligner, int max_span, bool interchromosomal, int max_interchromosomal_pairs) {
	// Only proceed if there is any anchor
	if ((left_anchors.size() == 0) && (right_anchors.size() == 0)) return;
	removeDuplicateAnchors(left_anchors);
	removeDuplicateAnchors(right_anchors);
	// Compute alignments for left anchors
	for (size_t i=0; i<left_anchors.size(); ++i) {
		anchor_t& a = left_anchors[i];
		if (a.reverse) {
//			cerr << "Computing LEFT anchor alignment " << i << " (REVERSE), ref_position: " << a.position << endl;
			assert(a.alignment == 0);
			a.alignment = aligner.computeAnchorAlignment(*references[a.ref_id], seq.reverseComplement(), a.position, SplitAligner::LEFT_ANCHOR).release();
// 			aligner.printTable(*a.alignment, cerr, *references[a.ref_id], seq.reverseComplement());
		} else {
//			cerr << "Computing LEFT anchor alignment " << i << " (FORWARD), ref_position: " << a.position << endl;
			assert(a.alignment == 0);
			a.alignment = aligner.computeAnchorAlignment(*references[a.ref_id], seq, a.position, SplitAligner::LEFT_ANCHOR).release();
// 			aligner.printTable(*a.alignment, cerr, *references[a.ref_id], seq);
		}
	}
	// Compute alignments for right anchors
	for (size_t i=0; i<right_anchors.size(); ++i) {
		anchor_t& a = right_anchors[i];
		if (a.reverse) {
//			cerr << "Computing RIGHT anchor alignment " << i << " (REVERSE), ref_position: " << a.position << endl;
			assert(a.alignment == 0);
			a.alignment = aligner.computeAnchorAlignment(*references[a.ref_id], seq.reverseComplement(), a.position, SplitAligner::RIGHT_ANCHOR).release();
// 			aligner.printTable(*a.alignment, cerr, *references[a.ref_id], seq.reverseComplement());
		} else {
//			cerr << "Computing RIGHT anchor alignment " << i << " (FORWARD), ref_position: " << a.position << endl;
			assert(a.alignment == 0);
			a.alignment = aligner.computeAnchorAlignment(*references[a.ref_id], seq, a.position, SplitAligner::RIGHT_ANCHOR).release();
// 			aligner.printTable(*a.alignment, cerr, *references[a.ref_id], seq);
		}
	}
	// Check whether some of the anchors give rise to whole alignments
	for (size_t i=0; i<left_anchors.size(); ++i) {
		checkForWholeMapping(left_anchors[i], aligner);
	}
	for (size_t i=0; i<right_anchors.size(); ++i) {
		checkForWholeMapping(right_anchors[i], aligner);
	}
	// set of pairs of start and end position of whole alignments
	boost::unordered_set<pair<int,int> > whole_alignment_positions;
	for (size_t i=0; i<mappings.size(); ++i) {
		whole_alignment_positions.insert(make_pair(mappings[i]->getStartPosition(), mappings[i]->getEndPosition()));
	}
	// Combine left and right anchors
	for (size_t i=0; i<left_anchors.size(); ++i) {
		SplitAligner::anchor_alignment_t* left_anchor_aln = left_anchors[i].alignment;
		for (size_t j=0; j<right_anchors.size(); ++j) {
			// if anchors are on different chromosomes, continue
			if (left_anchors[i].ref_id != right_anchors[j].ref_id) continue;
			// also skip those not on the same strand
			if (left_anchors[i].reverse != right_anchors[j].reverse) continue;
			if (left_anchors[i].position >= right_anchors[j].position) continue;
			if (right_anchors[j].position - left_anchors[i].position > max_span) continue;
			if (whole_alignment_positions.find(make_pair(left_anchors[i].position, right_anchors[j].position)) != whole_alignment_positions.end()) {
//				cerr << "Skipping pair: left anchor " << i << " and right anchor " << j << " (whole mapping exists)" << endl;
				continue;
			}
			SplitAligner::anchor_alignment_t* right_anchor_aln = right_anchors[j].alignment;
//			cerr << "Trying to combine left anchor " << i << " and right anchor " << j << endl;
			auto_ptr<vector<SplitAligner::split_alignment_t> > split_alignments = aligner.combineAnchors(*left_anchor_aln, *right_anchor_aln);
//			cerr << "Found " <<  split_alignments->size() << " split alignments!" << endl;
			for (size_t k=0; k<split_alignments->size(); ++k) {
				// cerr << "  (" << split_alignments->at(j).reference_breakpoint << "," << split_alignments->at(j).length << "," << split_alignments->at(j).phred_score << ")";
				// cerr << "chr1 " << (split_alignments->at(k).reference_breakpoint - split_alignments->at(k).length) << " " << split_alignments->at(k).reference_breakpoint << " " << split_alignments->at(k).phred_score << " " << split_alignments->at(k).cigar << endl;
			}
			if (split_alignments->size() > 0) {
				vector<SplitAligner::split_alignment_t>* aln_list = split_alignments.release();
				mappings.push_back(new SplitMapping(i,j,*this,aln_list,aln_list->at(0).phred_score,aln_list->at(0).mismatch_phred_score));
			}
		}
	}
	removeDuplicateMappings();
	// Determine score of best mapping
	int best_score = numeric_limits<int>::max() / 2;
	for (size_t i=0; i<mappings.size(); ++i) {
		if (mappings[i]->getPhredScore() < best_score) best_score = mappings[i]->getPhredScore();
	}
	// Check for interchromosomal mappings
	if (interchromosomal && aligner.doInterchromosomalSearch(best_score)) {
		// Look for pairs with same strandedness
		int n = left_anchors.size() * right_anchors.size();
		if (n <= max_interchromosomal_pairs) {
			for (size_t i=0; i<left_anchors.size(); ++i) {
				SplitAligner::anchor_alignment_t* left_anchor_aln = left_anchors[i].alignment;
				for (size_t j=0; j<right_anchors.size(); ++j) {
					// if anchors are on same chromosomes, continue
					if (left_anchors[i].ref_id == right_anchors[j].ref_id) continue;
					// also skip those not on the same strand
					if (left_anchors[i].reverse != right_anchors[j].reverse) continue;
					SplitAligner::anchor_alignment_t* right_anchor_aln = right_anchors[j].alignment;
					auto_ptr<vector<SplitAligner::interchrom_split_alignment_t> > split_alignments = aligner.combineAnchorsInterchromosomal(*left_anchor_aln, *right_anchor_aln, best_score);
					if (split_alignments->size() > 0) {
						vector<SplitAligner::interchrom_split_alignment_t>* aln_list = split_alignments.release();
						mappings.push_back(new InterSplitMapping(&references,i,true,j,false,*this,aln_list,aln_list->at(0).phred_score,aln_list->at(0).mismatch_phred_score));
					}
				}
			}
		}
		// Look for pairs of left alignments with different strandedness
		n = left_anchors.size() * (left_anchors.size() - 1) / 2;
		if (n <= max_interchromosomal_pairs) {
			for (size_t i=0; i<left_anchors.size(); ++i) {
				SplitAligner::anchor_alignment_t* anchor_aln1 = left_anchors[i].alignment;
				for (size_t j=i+1; j<left_anchors.size(); ++j) {
					// if anchors are on same chromosomes, continue
					if (left_anchors[i].ref_id == left_anchors[j].ref_id) continue;
					// also skip those on the same strand
					if (left_anchors[i].reverse == left_anchors[j].reverse) continue;
					SplitAligner::anchor_alignment_t* anchor_aln2 = left_anchors[j].alignment;
					auto_ptr<vector<SplitAligner::interchrom_split_alignment_t> > split_alignments = aligner.combineAnchorsInterchromosomal(*anchor_aln1, *anchor_aln2, best_score);
					if (split_alignments->size() > 0) {
						vector<SplitAligner::interchrom_split_alignment_t>* aln_list = split_alignments.release();
						mappings.push_back(new InterSplitMapping(&references,i,true,j,true,*this,aln_list,aln_list->at(0).phred_score,aln_list->at(0).mismatch_phred_score));
					}
				}
			}
		}
		// Look for pairs of left alignments with different strandedness
		n = right_anchors.size() * (right_anchors.size() - 1) / 2;
		if (n <= max_interchromosomal_pairs) {
			for (size_t i=0; i<right_anchors.size(); ++i) {
				SplitAligner::anchor_alignment_t* anchor_aln1 = right_anchors[i].alignment;
				for (size_t j=i+1; j<right_anchors.size(); ++j) {
					// if anchors are on same chromosomes, continue
					if (right_anchors[i].ref_id == right_anchors[j].ref_id) continue;
					// also skip those on the same strand
					if (right_anchors[i].reverse == right_anchors[j].reverse) continue;
					SplitAligner::anchor_alignment_t* anchor_aln2 = right_anchors[j].alignment;
					auto_ptr<vector<SplitAligner::interchrom_split_alignment_t> > split_alignments = aligner.combineAnchorsInterchromosomal(*anchor_aln1, *anchor_aln2, best_score);
					if (split_alignments->size() > 0) {
						vector<SplitAligner::interchrom_split_alignment_t>* aln_list = split_alignments.release();
						mappings.push_back(new InterSplitMapping(&references,i,false,j,false,*this,aln_list,aln_list->at(0).phred_score,aln_list->at(0).mismatch_phred_score));
					}
				}
			}
		}
	}
	is_sorted = false;
}

void Read::sortMappings() {
	sort(mappings.begin(), mappings.end(), mapping_comparator_phredbased_t());
	is_sorted = true;
}

void Read::addWholeAlignments(const vector<BamTools::BamAlignment>& alignments, const std::vector<NamedDnaSequence*>& references, bool just_sequence) {
	assert(alignments.size() > 0);
	seq = ShortDnaSequence(alignments[0].QueryBases, alignments[0].Qualities);
	// take care to store the original (unreversed) sequences
	if (alignments[0].IsReverseStrand()) {
		seq = seq.reverseComplement();
	}
	if (just_sequence) return;
/*	if (alignments[0].IsMapped()) {
		for (size_t i=0; i<alignments.size(); ++i) {
			const BamTools::BamAlignment& a = alignments[i];
			if (!a.IsMapped()) continue;
			if (a.RefID < 0) continue;
			if (a.GetEndPosition() - 1 >= (int)references[a.RefID]->size()) continue;
			uint32_t as = 0;
			if (!a.GetTag("AS", as)) {
				ostringstream oss;
				oss << "Read \"" << alignments[0].Name << "\" is lacking an AS tag.";
				throw std::runtime_error(oss.str());
			}
			mappings.push_back(new WholeMapping(a.RefID, a.Position, a.GetEndPosition()-1, a.IsReverseStrand(), a.CigarData,as));
		}
	}
*/
	for (size_t i=0; i<alignments.size(); ++i) {
		const BamTools::BamAlignment& a = alignments[i];
		if (!a.IsMapped()) continue;
		if (a.RefID < 0) continue;
		if (a.GetEndPosition() - 1 >= (int)references[a.RefID]->size()) continue;
		left_anchors.push_back(anchor_t(a.RefID, a.Position, a.IsReverseStrand(), a.Length, true, true));
		right_anchors.push_back(anchor_t(a.RefID, a.GetEndPosition()-1, a.IsReverseStrand(), a.Length, true, true));
	}
}

void Read::addSequenceAndQualities(const std::string& dna, const std::string& qualities) {
	seq = ShortDnaSequence(dna, qualities);
}

void Read::addLeftAnchor(int ref_id, int position, bool reverse, int length, bool whole_read) {
	left_anchors.push_back(anchor_t(ref_id, position, reverse, length, whole_read, true));
}

void Read::addRightAnchor(int ref_id, int position, bool reverse, int length, bool whole_read) {
	right_anchors.push_back(anchor_t(ref_id, position, reverse, length, whole_read, true));
}

void Read::checkForWholeMapping(const anchor_t& anchor, const SplitAligner& aligner) {
	assert(anchor.alignment != 0);
	auto_ptr<SplitAligner::whole_alignment_t> a = aligner.toWholeAlignment(*anchor.alignment);
	if (a.get() == 0) return;
//	cerr << "Found whole mapping from anchor!" << endl;
	mappings.push_back(new WholeMapping(anchor.ref_id, a->start_position, a->end_position, anchor.reverse, a->cigar, a->phred_score, a->mismatch_phred_score));
	is_sorted = false;
}

void Read::filterByPhredScore(int max_phred) {
	assert(is_sorted);
	int keep = 0;
	while ((keep < (int)mappings.size()) && (mappings[keep]->getPhredScore() <= max_phred)) {
		keep += 1;
	}
	for (size_t i=keep; i<mappings.size(); ++i) {
		delete mappings[i];
	}
	mappings.resize(keep);
}

ostream& operator<<(ostream& os, const Read& read) {
	os << "  Left anchors:";
	for (size_t i=0; i<read.left_anchors.size(); ++i) {
		const Read::anchor_t& a = read.left_anchors[i];
		os << " [" << a.ref_id << "," << a.position << ":" << (a.position+a.length-1) << "," << (a.reverse?"R":"F") << "]";
	}
	os << endl;
	os << "  Right anchors:";
	for (size_t i=0; i<read.right_anchors.size(); ++i) {
		const Read::anchor_t& a = read.right_anchors[i];
		os << " [" << a.ref_id << "," << (a.position-a.length+1) << ":" << a.position << "," << (a.reverse?"R":"F") << "]";
	}
	os << endl;
	const std::vector<Mapping*>& mappings = read.getMappings();
	os << "  Mappings (" << mappings.size() << "):" << endl;
	for (size_t i = 0; i < mappings.size(); ++i) {
		//os << "    " << i << ": " << mappings[i]->getRefId() << ", " << mappings[i]->getStartPosition() << " - " << mappings[i]->getEndPosition() << ", PHRED: " << mappings[i]->getPhredScore() << endl;
		os << "    " << i << ": " << (*mappings[i]) << endl;
	}
	return os;
}

ostream& operator<<(ostream& os, const Read::anchor_search_regions_t& regions) {
	os << "  For left anchor (forward):  " << regions.left << endl;
	os << "  For left anchor (revcomp):  " << regions.left_reverse << endl;
	os << "  For right anchor (forward): " << regions.right << endl;
	os << "  For right anchor (revcomp): " << regions.right_reverse << endl;
	return os;
}

